import pydata_google_auth
import datetime
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings
import math
from datetime import timedelta
from pytz import timezone, utc
from scipy import stats
import plotly.express as px
import plotly.offline as pyo
from fpdf import FPDF
params = {'project_id' : 7807,
'start_date_kst': '2021-05-01',
'end_date_kst' : '2021-06-26'
,'start_date_utc': '',
'end_date_utc': '',
'n': 20}
def getBigQueryData(params):
query = """
SELECT
R.*,
M.member_nm as worker_name,
M.login_id as worker_login_id
FROM (SELECT
CAST(W.project_id as INT64) as project_id,
CAST(W.worker_id as INT64) as worker_id,
CAST(W.first_work_cnt as INT64) as first_work_cnt,
CAST(W.first_work_time as INT64) as first_work_time,
CAST(W.first_work_avg_time as FLOAT64) as first_work_avg_time,
CAST(W.re_work_cnt as INT64) as re_work_cnt,
CAST(W.re_work_time as INT64) as re_work_time,
CAST(W.re_work_avg_time as FLOAT64) as re_work_avg_time,
CAST(W.work_tot_cnt as INT64) as work_tot_cnt,
IFNULL(C.check_cnt,0) as check_cnt, -- 검수 진행된 작업 수
IFNULL(C.reject_cnt,0) as reject_cnt, -- 누적 반려 수
ROUND(IFNULL(SAFE_DIVIDE(IFNULL(C.check_cnt,0),CAST(W.first_work_cnt as INT64)),0) * 100, 2) as check_rate_per, -- 검수진행률
ROUND(IFNULL(SAFE_DIVIDE(IFNULL(C.reject_cnt,0),CAST(C.check_cnt as INT64)),0) * 100, 2) as reject_rate_per, -- 반려율
iqr_work_cnt,
iqr_work_avg_time
FROM
(
SELECT
project_id,
worker_id,
sum(if(work_data_seq='1', 1, 0)) as first_work_cnt, -- 최초 작업 수
sum(if(work_data_seq='1', diff, 0)) as first_work_time, -- 최초 작업시간 합
round(ifnull(SAFE_DIVIDE(sum(if(work_data_seq='1', diff, 0)), sum(if(work_data_seq='1', 1, 0))),0), 2) as first_work_avg_time, -- 평균
sum(if(work_data_seq <> '1', 1, 0)) as re_work_cnt, -- 재작업 수
sum(if(work_data_seq <> '1', diff, 0)) as re_work_time, -- 재작업 시간 합
round(ifnull(SAFE_DIVIDE(sum(if(work_data_seq<>'1', diff, 0)), sum(if(work_data_seq<>'1', 1, 0))),0), 2) as re_work_avg_time, -- 재작업시간 평균
count(data_id) as work_tot_cnt,
sum(if(work_data_seq='1' and diff >= percentile_25 and diff <= percentile_75, 1, 0)) as iqr_work_cnt, -- 최초 작업 수
round(ifnull(SAFE_DIVIDE(sum(if(work_data_seq='1' and diff >= percentile_25 and diff <= percentile_75, diff, 0)), sum(if(work_data_seq='1' and diff >= percentile_25 and diff <= percentile_75, 1, 0))),0), 2) as iqr_work_avg_time -- 평균
FROM (
SELECT
datetime(cast(dt as timestamp), 'Asia/Seoul') as dt,
project_id,
data_id,
worker_id,
datetime(cast(s_dt as timestamp), 'Asia/Seoul') as s_dt,
datetime(cast(e_dt as timestamp), 'Asia/Seoul') as e_dt,
DATETIME_DIFF(e_dt, s_dt, SECOND) as diff,
work_data_seq,
percentile_25,
percentile_75
FROM(
SELECT
MAX(dt) as dt
,project_id
,data_id
,worker_id
,MIN(IF(action='work_start' or action='rework_start', dt, null)) as s_dt
,MAX(IF(action='work_end' or action='rework_end', dt, null)) as e_dt
,work_data_seq
FROM `crowdworks-platform.crowdworks_event.cw_event_data`
WHERE
day >= DATE(DATETIME_ADD('{start_date_kst} 00:00:00', INTERVAL -9 HOUR))
AND dt >=DATETIME(DATETIME_ADD('{start_date_kst} 00:00:00', INTERVAL -9 HOUR))
AND day <= DATE(DATETIME_ADD('{end_date_kst} 23:59:59', INTERVAL -9 HOUR))
AND dt <= DATETIME(DATETIME_ADD('{end_date_kst} 23:59:59', INTERVAL -9 HOUR))
AND project_id = '{project_id}'
AND worker_id <> ''
AND data_id <> ''
AND action in ('work_start','work_end', 'rework_start', 'rework_end')
GROUP BY project_id,data_id,worker_id,work_data_seq
),
(
SELECT
percentile_25,
percentile_75,
(percentile_75 - percentile_25) as iqr
FROM (
SELECT
APPROX_QUANTILES(diff, 4)[OFFSET (1)] AS percentile_25,
APPROX_QUANTILES(diff, 4)[OFFSET (3)] AS percentile_75
FROM (
SELECT
datetime(cast(dt as timestamp), 'Asia/Seoul') as dt,
project_id,
data_id,
worker_id,
datetime(cast(s_dt as timestamp), 'Asia/Seoul') as s_dt,
datetime(cast(e_dt as timestamp), 'Asia/Seoul') as e_dt,
DATETIME_DIFF(e_dt, s_dt, SECOND) as diff,
work_data_seq
FROM (
SELECT
MAX(dt) as dt
,project_id
,data_id
,worker_id
,MIN(IF(action='work_start' or action='rework_start', dt, null)) as s_dt
,MAX(IF(action='work_end' or action='rework_end', dt, null)) as e_dt
,work_data_seq
FROM `crowdworks-platform.crowdworks_event.cw_event_data`
WHERE
day >= DATE(DATETIME_ADD('{start_date_kst} 00:00:00', INTERVAL -9 HOUR))
AND dt >=DATETIME(DATETIME_ADD('{start_date_kst} 00:00:00', INTERVAL -9 HOUR))
AND day <= DATE(DATETIME_ADD('{end_date_kst} 23:59:59', INTERVAL -9 HOUR))
AND dt <= DATETIME(DATETIME_ADD('{end_date_kst} 23:59:59', INTERVAL -9 HOUR))
AND project_id = '{project_id}'
AND worker_id <> ''
AND data_id <> ''
AND action in ('work_start','work_end', 'rework_start', 'rework_end')
GROUP BY project_id,data_id,worker_id,work_data_seq
)
WHERE s_dt is not null AND e_dt is not null AND work_data_seq='1'
)
)
)
WHERE s_dt is not null AND e_dt is not null
)
GROUP BY 1,2
) W
LEFT JOIN
(
SELECT
project_id,
worker_id,
count(data_id) as check_cnt,
SUM(reject_cnt) as reject_cnt
FROM (
SELECT
project_id,
worker_id,
data_id,
SUM(reject) as reject_cnt
FROM (
SELECT
A.action as action,
A.dt as dt,
A.project_id as project_id,
A.data_id as data_id,
A.worker_id as worker_id,
A.work_data_seq as work_data_seq,
B.rank as rank,
if(a.action = 'check_reject', 1, 0) as reject
FROM
(SELECT
action,
dt,
project_id,
data_id,
worker_id,
work_data_seq,
FROM `crowdworks-platform.crowdworks_event.cw_event_data`
WHERE
day >= '{start_date_kst}' AND day <= '{end_date_kst}'
AND dt >= '{start_date_kst}' AND dt <= '{end_date_kst}'
AND project_id = '{project_id}'
AND worker_id <> ''
AND data_id <> ''
AND checker_id <> ''
AND action in ('check_end','check_reject')) A
LEFT JOIN
(SELECT
action,
dt,
project_id,
data_id,
worker_id,
work_data_seq,
RANK() OVER (PARTITION BY worker_id ORDER BY dt) as rank
FROM `crowdworks-platform.crowdworks_event.cw_event_data`
WHERE
day >= DATE(DATETIME_ADD('{start_date_kst} 00:00:00', INTERVAL -9 HOUR))
AND dt >=DATETIME(DATETIME_ADD('{start_date_kst} 00:00:00', INTERVAL -9 HOUR))
AND day <= DATE(DATETIME_ADD('{end_date_kst} 23:59:59', INTERVAL -9 HOUR))
AND dt <= DATETIME(DATETIME_ADD('{end_date_kst} 23:59:59', INTERVAL -9 HOUR))
AND project_id = '{project_id}'
AND worker_id <> ''
AND data_id <> ''
AND action in ('work_end','rework_end')) B
ON (A.data_id = B.data_id AND A.worker_id = B.worker_id AND A.work_data_seq = B.work_data_seq)
)
GROUP BY 1,2,3
)
GROUP BY 1,2
) C
ON (W.project_id = C.project_id AND W.worker_id = C.worker_id)
) R
LEFT JOIN `crowdworks-platform.dgwp_1st_party.works_user_profile` M
ON (M.member_id = R.worker_id)
""".format(project_id=params['project_id'], start_date_kst=params['start_date_kst'], end_date_kst=params['end_date_kst'])
df = pd.read_gbq(query = query, project_id = 'crowdworks-platform', dialect='standard')
query2 = """
SELECT
day,
dt,
action,
worker_id,
project_id,
data_id
FROM `crowdworks-platform.crowdworks_event.cw_event_data`
WHERE
day >= DATE(DATETIME_ADD('{start_date_kst} 00:00:00', INTERVAL -9 HOUR))
AND dt >=DATETIME(DATETIME_ADD('{start_date_kst} 00:00:00', INTERVAL -9 HOUR))
AND day <= DATE(DATETIME_ADD('{end_date_kst} 23:59:59', INTERVAL -9 HOUR))
AND dt <= DATETIME(DATETIME_ADD('{end_date_kst} 23:59:59', INTERVAL -9 HOUR))
AND project_id = '{project_id}'
AND worker_id <> ''
AND data_id <> ''
ORDER BY dt
""".format(project_id=params['project_id'], start_date_kst=params['start_date_kst'], end_date_kst=params['end_date_kst'])
df2 = pd.read_gbq(query = query2, project_id = 'crowdworks-platform', dialect='standard')
query3 = """
SELECT
project_id,
data_id,
sum(if(action = 'check_reject', 1, 0)) as check_reject_cnt,
FROM `crowdworks-platform.crowdworks_event.cw_event_data`
WHERE
day >= DATE(DATETIME_ADD('{start_date_kst} 00:00:00', INTERVAL -9 HOUR))
AND dt >=DATETIME(DATETIME_ADD('{start_date_kst} 00:00:00', INTERVAL -9 HOUR))
AND day <= DATE(DATETIME_ADD('{end_date_kst} 23:59:59', INTERVAL -9 HOUR))
AND dt <= DATETIME(DATETIME_ADD('{end_date_kst} 23:59:59', INTERVAL -9 HOUR))
AND project_id = '{project_id}'
AND worker_id <> ''
AND data_id <> ''
AND action in ('check_end','check_reject')
GROUP BY 1,2
""".format(project_id=params['project_id'], start_date_kst=params['start_date_kst'], end_date_kst=params['end_date_kst'])
df3 = pd.read_gbq(query = query3, project_id = 'crowdworks-platform', dialect='standard')
query4 = """
with d as (SELECT
project_id,
member_id,
worker_id,
action,
data_id
FROM `crowdworks-platform.crowdworks_event.cw_event_data`
WHERE
day >= DATE(DATETIME_ADD('{start_date_kst} 00:00:00', INTERVAL -9 HOUR))
AND dt >=DATETIME(DATETIME_ADD('{start_date_kst} 00:00:00', INTERVAL -9 HOUR))
AND day <= DATE(DATETIME_ADD('{end_date_kst} 23:59:59', INTERVAL -9 HOUR))
AND dt <= DATETIME(DATETIME_ADD('{end_date_kst} 23:59:59', INTERVAL -9 HOUR))
AND project_id = '{project_id}'
AND worker_id <> ''
AND data_id <> ''
AND action in ('work_start', 'work_end','all_finished'))
select
project_id,
MAX(s1) as s1,
MAX(s2) as s2,
MAX(s3) as s3
from
(
select
project_id,
count(distinct member_id) as s1,
0 as s2,
0 as s3
from d
where action='work_start'
group by 1
union all
select
project_id,
0 as s1,
count(distinct member_id) as s2,
0 as s3
from d
where action='work_end'
group by 1
union all
select
project_id,
0 as s1,
0 as s2,
count(distinct worker_id) as s3
from
(select
project_id,
worker_id,
count(distinct data_id) data_cnt
from d
where action='all_finished'
group by 1,2
having data_cnt >= {n})
group by 1
)
group by 1
""".format(project_id=params['project_id'], start_date_kst=params['start_date_kst'], end_date_kst=params['end_date_kst'], n=params['n'])
df4 = pd.read_gbq(query = query4, project_id = 'crowdworks-platform', dialect='standard')
return df, df2, df3, df4
import pymysql
conn = pymysql.connect(
host='db-1ea6m.pub-cdb.ntruss.com',
port=int(3306),
user='ybkim',
passwd='m883jPts',
db='cwaidata'
)
project_info = pd.read_sql_query("""
SELECT *
FROM CW_PROJECT
WHERE project_id = '{project_id}'
""".format(project_id=params['project_id']), conn)
project_info2 = pd.read_sql_query("""
SELECT *
FROM TB_PRJ_MST
WHERE project_id = '{project_id}'
""".format(project_id=params['project_id']), conn)
df, df2, df3, df4 = getBigQueryData(params)
df2[df2['action']=='work_end'].groupby('day').count()['data_id']
day 2021-06-03 457 2021-06-04 755 2021-06-05 190 2021-06-06 65 2021-06-07 666 2021-06-08 212 2021-06-09 1298 2021-06-10 796 2021-06-11 13106 2021-06-12 14275 2021-06-13 10378 2021-06-14 8951 2021-06-19 2 Name: data_id, dtype: int64
fig, axs = plt.subplots(1,2)
fig.subplots_adjust(wspace=.5)
plt.rcParams['figure.figsize'] = [15, 5]
#날짜별로 그룹 하고 워커 아이디의 유니크한 수를 계산
df2.groupby('day')['worker_id'].nunique().plot(kind='line', title='number of workers by day', ax=axs[0])
#action이 work_end인 애들로만 필터를 하고 날짜별로 그룹 한 다음 data 수를 count함
df2[df2['action']=='work_end'].groupby('day').count()['data_id'].plot(kind='line', title='number of tasks completed by day', ax=axs[1])
plt.savefig('overview.jpg')
df4
| s1 | s2 | s3 | |
|---|---|---|---|
| 0 | 1494 | 929 | 0 |
df4 = df4.drop(columns='project_id')
pyo.init_notebook_mode()
new_df4 = pd.DataFrame()
new_df4['number'] = df4.values[0]
new_df4['stage'] = ['enter_num', 'trial_num', 'worker_num']
fig = px.funnel(new_df4, x='number', y='stage')
fig.show()
fig.write_image("funnel.jpg")
data_cnt_by_day_and_worker[data_cnt_by_day_and_worker[0]>params['n']].groupby('day').count()[0]
#work_end로 필터하고, 날짜별, 워커 아이디별 그룹을 하고, 각 날짜의 그 워커가 몇개의 작업을 했는지 카운트 한다
data_cnt_by_day_and_worker = df2[df2['action']=='work_end'].groupby(['day', 'worker_id']).size().reset_index()
#n개 이상을 한 워커들로 필터를 하고, 날짜별로 그룹핑을 하고, 카운트 함
active_worker_count_day = data_cnt_by_day_and_worker[data_cnt_by_day_and_worker[0]>params['n']].groupby('day').count()[0]
title = 'Count of Workers that Submited at Least ' + str(params['n']) + ' tasks'
active_worker_count_day.plot(kind='line', title=title)
plt.ylabel('Active Worker Count')
plt.savefig('active_worker_count.jpg')
average_work_count_per_worker_day = data_cnt_by_day_and_worker[data_cnt_by_day_and_worker[0]>params['n']].groupby('day').mean()[0]
average_work_count_per_worker_day.plot(kind='line', title = 'Daily Average Count of Tasks Submitted by Active Worker')
plt.ylabel('Task Count')
plt.savefig('daily_average_task_count_by_worker.jpg')
binwidth=60
df['first_work_avg_time'].sort_values().plot(kind='hist', bins=np.arange(min(df['first_work_avg_time']), max(df['first_work_avg_time']) + binwidth, binwidth))
work_avg_speed = df['first_work_avg_time'].median()
print('작업 평균 스피드: ' + str(int(work_avg_speed/60)) + '분 ' + str(int(work_avg_speed%60)) + '초')
plt.savefig('work_speed.jpg')
작업 평균 스피드: 2분 8초
#데이터 과부하 문제가 있을수있어 정식님에게 쿼리 요청하는게 나을듯
df.first_work_cnt.value_counts().sort_index().plot(kind='bar', title='Number of Tasks Completed by Worker Distribution')
plt.xlabel('Number of Tasks Completed')
plt.ylabel('Worker Count')
plt.savefig('num_task_completed.jpg')
df['re_work_rate'] = df['re_work_cnt']/df['reject_cnt']
df['re_work_rate'].dropna().value_counts().sort_index().plot(kind='bar', title='Rework Rate by Worker Distribution')
plt.xlabel('Rework Rate')
plt.ylabel('Worker Count')
plt.savefig('rework_rate.jpg')
total_re_work_rate = df['re_work_cnt'].sum()/df['reject_cnt'].sum()
total_re_work_rate = 'Total Rework Rate: ' + str(total_re_work_rate)
print(total_re_work_rate)
Total Rework Rate: 0.9
df3['check_reject_cnt'].value_counts().sort_index().plot(kind='bar', title='Reject Count Distribution')
plt.xlabel('Number of Rejections')
plt.ylabel('Data Count')
plt.savefig('check_reject_distribution.jpg')
#n개의 작업을 한 작업자들을 필터 하고, 그들의 worker_id를 tuple data type으로 저장한다
worker_id = tuple(df[df['first_work_cnt'] <= params['n']]['worker_id'].values)
#tuple안에 있는 worker_id 값들을 string으로 전환시킨다
worker_id = tuple([[str(x) for x in worker_id] for y in worker_id][0])
#start_date와 end_date를 상황에 따라 input 값으로 바꿔야함
query = """
SELECT worker_id, project_id, dt
FROM `crowdworks-platform.crowdworks_event.cw_event_data`
WHERE day BETWEEN "2021-06-03" AND "2021-06-10"
AND worker_id IN {worker_id}
AND action IN ('work_end')
""".format(worker_id=worker_id)
raw_df = pd.read_gbq(query = query, project_id = 'crowdworks-platform', dialect='standard')
tracker_df = raw_df.groupby('worker_id')['project_id'].apply(set)
tracker_df = pd.DataFrame(tracker_df)
tracker_df.head()
| project_id | |
|---|---|
| worker_id | |
| 105542 | {7852, 7998, 7972, 7866} |
| 110540 | {7902} |
| 112821 | {8000} |
| 118711 | {8000, 7889, 7807} |
| 12328 | {7902} |
counter = {}
for index, row in tracker_df.iterrows():
for project_id in row['project_id']:
if project_id not in counter:
counter[project_id] = 0
counter[project_id] += 1
# Data to plot
labels = []
sizes = []
for x, y in counter.items():
labels.append(x)
sizes.append(y)
plt.pie(sizes, labels=labels, autopct='%.2f')
plt.axis('equal')
plt.savefig('worker_tracker_pie.jpg')
project_name = "Project Name: " + project_info['project_name'][0]
data_type = "Data Type: " + project_info['source_type_cd'][0]
project_type = "Project Type: " + project_info['template_type'][0]
reward_worker = "Worker Unit Price: " + str(project_info2['reward_worker'][0])
reward_checker = "Checker Unit Price: " + str(project_info2['reward_checker'][0])
task_limit = "Task Data Limit: " + str(project_info2['work_cnt_limit'][0])
start_date = "Start Date: " + project_info['start_date'][0].strftime('%m/%d/%Y')
end_date = "End Date: " + project_info['end_date'][0].strftime('%m/%d/%Y')
worker_salary = "Average Worker Salary: " + str((3600/work_avg_speed)*project_info2['reward_worker'][0])
total_data_cnt = "Total Data Count: " + str(project_info['total_src_cnt_pointer'][0])
days_left = (project_info['end_date'][0] - project_info['start_date'][0]).days
target_daily_rate = 'Target Daily Rate: ' + str(int(project_info['total_src_cnt_pointer'][0]/days_left))
row_num = df2[df2['action']=='work_end'].groupby('day').count().shape[0]
actual_daily_rate = 'Current Daily Rate: ' + str(df2[df2['action']=='work_end'].groupby('day').count()['dt'][row_num-2])
pdf = FPDF()
pdf.add_page()
pdf.set_xy(80, 5)
#제목
pdf.set_font('arial', 'B', 16)
pdf.cell(75, 10, "Project Analysis", 0, 2, 'L')
#1.Overview
pdf.set_xy(20, 20)
pdf.set_font('arial', 'B', 16)
pdf.cell(75, 10, "1. Overview", 0, 2, 'B')
#프로젝트 기본 정보
pdf.set_font('arial', 'B', 10)
pdf.set_xy(20, 35)
pdf.cell(0, 5, "Project Info", 0, 2, 'B')
pdf.set_font('arial', '', 10)
pdf.cell(0, 5, "Project Name: ", 0, 2, 'B')
pdf.cell(0, 5, project_type, 0, 2, 'B')
pdf.cell(0, 5, data_type, 0, 2, 'B')
pdf.cell(0, 5, total_data_cnt, 0, 2, 'B')
pdf.cell(0, 5, reward_worker, 0, 2, 'B')
pdf.cell(0, 5, reward_checker, 0, 2, 'B')
pdf.cell(0, 5, task_limit, 0, 2, 'B')
pdf.cell(0, 5, start_date, 0, 2, 'B')
pdf.cell(0, 5, end_date, 0, 0, 'B')
#작업 완료 스피드
pdf.set_xy(85, 35)
pdf.set_font('arial', 'B', 10)
pdf.cell(0, 5, "Work Rate", 0, 2, 'B')
pdf.set_font('arial', '', 10)
pdf.cell(0, 5, target_daily_rate, 0, 2, 'B')
pdf.cell(0, 5, actual_daily_rate, 0, 2, 'B')
#포인트
pdf.set_xy(85, 55)
pdf.set_font('arial', 'B', 10)
pdf.cell(0, 5, "Point", 0, 2, 'B')
pdf.set_font('arial', '', 10)
pdf.cell(0, 5, "Total Points: ", 0, 2, 'B')
pdf.cell(0, 5, worker_salary, 0, 2, 'B')
#진척률
pdf.set_xy(0, 90)
pdf.image('overview.jpg', x = None, y = None, w = 200, h = 0, type = '', link = '')
#Funnel
pdf.set_xy(40, 170)
pdf.image('funnel.jpg', x = None, y = None, w = 125, h = 0, type = '', link = '')
#1.작업 난이도
pdf.set_xy(20, 270)
pdf.set_font('arial', 'B', 16)
pdf.cell(75, 10, "2. Difficulty Level", 0, 2, 'B')
#작업 스피드
pdf.set_font('arial', '', 12)
average_speed = 'Average work speed: ' + str(int(work_avg_speed/60)) + ' min ' + str(int(work_avg_speed%60)) + ' sec'
pdf.cell(20, 15, txt=average_speed, border=0, ln=2, align='L')
pdf.set_xy(0, 30)
pdf.image('work_speed.jpg', x = None, y = None, w = 200, h = 0, type = '', link = '')
#평균 이미지당 오브젝트 수
pdf.set_font('arial', '', 12)
average_number_of_objects = 'Average Number of Objects per image: '
pdf.set_xy(20, 100)
pdf.cell(20, 15, txt=average_number_of_objects, border=0, ln=2, align='B')
#밴 rate
pdf.set_font('arial', '', 12)
ban_rate = 'Ban Rate: '
ban_count = 'Ban Count: '
pdf.set_xy(20, 120)
pdf.cell(0, 5, txt=ban_rate, border=0, ln=2, align='B')
pdf.cell(0, 5, txt=ban_count, border=0, ln=2, align='B')
#CS 수
pdf.set_font('arial', '', 12)
cs_rate = 'Number of CS per worker: '
cs_count = 'CS Count: '
pdf.set_xy(20, 140)
pdf.cell(0, 5, txt=cs_rate, border=0, ln=2, align='B')
pdf.cell(0, 5, txt=cs_count, border=0, ln=2, align='B')
#작업자 2
pdf.set_xy(20, 540)
pdf.set_font('arial', 'B', 16)
pdf.cell(0, 10, "3. Worker", 0, 2, 'B')
pdf.set_xy(0, 20)
pdf.image('active_worker_count.jpg', x = None, y = None, w = 200, h = 0, type = '', link = '')
pdf.set_xy(0, 90)
pdf.image('daily_average_task_count_by_worker.jpg', x = None, y = None, w = 200, h = 0, type = '', link = '')
#3.작업자
pdf.set_xy(20, 540)
pdf.set_font('arial', 'B', 16)
pdf.cell(0, 10, "3. Worker", 0, 2, 'B')
pdf.set_font('arial', '', 12)
pdf.set_xy(20, 25)
pdf.cell(0, 5, "Average Hours Worked per Day: ", 0, 2, 'B')
pdf.cell(0, 5, "Active Worker Number: ", 0, 2, 'B')
pdf.set_xy(0, 40)
pdf.image('num_task_completed.jpg', x = None, y = None, w = 200, h = 0, type = '', link = '')
#재작업률
pdf.set_xy(0, 110)
pdf.image('rework_rate.jpg', x = None, y = None, w = 200, h = 0, type = '', link = '')
pdf.set_xy(20, 180)
pdf.cell(0, 5, txt=total_re_work_rate, border=0, ln=2, align='B')
pdf.set_xy(0, 190)
pdf.image('check_reject_distribution.jpg', x = None, y = None, w = 200, h = 0, type = '', link = '')
#검수자
pdf.set_xy(20, 810)
pdf.set_font('arial', 'B', 16)
pdf.cell(0, 10, "4. Checker", 0, 2, 'B')
#플랫폼
pdf.set_xy(20, 1090)
pdf.set_font('arial', 'B', 16)
pdf.cell(0, 10, "5. Platform", 0, 2, 'B')
pdf.set_font('arial', '', 12)
pdf.set_xy(20, 25)
pdf.cell(0, 10, txt='Salary Comparison', border=0, ln=2, align='B')
pdf.set_xy(20, 40)
pdf.cell(0, 10, txt='Worker Exit Tracker', border=0, ln=2, align='B')
pdf.set_xy(0, 50)
pdf.image('worker_tracker_pie.jpg', x = None, y = None, w = 200, h = 0, type = '', link = '')
pdf.output('testing.pdf', 'F')
''